********************************************************************************
********************************************************************************
* PROGRAM:		randomization_inference
* Project:		Stress, Ethnicity, and Prosocial Behavior
* Author:		Moritz Poll (moritz.poll@brown.edu)
* PIs:			Johannes Haushofer, Sara Lowes, Abednego Musau, David Ndetei, 
*				Nathan Nunn, Moritz Poll, Nancy Qian
* Purpose:		Calculates randomization inference p-values
********************************************************************************
********************************************************************************
* In the pre-analysis plan, we commit to the following:
* "We will also check robustness to alternative standard errors, including [...]
* standard errors calculated using randomization inference."
********************************************************************************

global noisyregs "" // noisily
set more off
set trace off
set seed 42
clear all
timer clear 1
timer on 1

global reps 10000
if $reps >= 100 set maxvar `=max(`=2*$reps+1000',5000)'
if $reps >= 1000 set maxvar 32767

* This script may run for several days, so it pauses daily at a specified time.
global pausetime "10:00" // "off"

local generate_dataset = 1
if `generate_dataset' {
********************************************************************************
************************ Perturbed treatment assignments ***********************
********************************************************************************
* There are two levels at which randomization happens:
* 1. Pill assignment is random but balanced at session level in that up to ten
* 	 participants receive either pill, which can be imbalanced if fewer than
*	 twenty participants show up. Up to session 6, 30 pills were assigned 15/15.
*	 From session 7 onwards, it is 20 pills into 10/10.
* 2. At the decision level, participants are randomly assigned partners of
*	 different characteristics.
********************************************************************************

********************************************************************************
****************************** Pill assignment *********************************
********************************************************************************
* At "full" session level, since pills were given to half the session before we
* knew who would attend.
********************************************************************************

noisily dis as input "Start: Randomization Inference based on $reps repetitions"
set obs 119
gen int session = _n
expand 30 if session <= 6
expand 20 if session >  6
bysort session: gen int subject = _n

gen strL condensed_treatsim = ""
forvalues r = 1 / $reps {
	tempvar pill_order`r'
	gen `pill_order`r'' = runiform()
	sort session `pill_order`r''
	by session: gen byte treatsim`r' = (_n > _N/2)
	drop `pill_order`r''
	replace condensed_treatsim = condensed_treatsim + string(treatsim`r')
}
sort session subject
label value treatsim* treat

********************************************************************************
*********************** Reduce to actual participants **************************
********************************************************************************

preserve
use "$cleandata_dir/HIO_for_analysis_wide.dta", clear
keep session subject demo_mothertongue
tempfile actual_participants
save `actual_participants'
restore
merge 1:1 session subject using `actual_participants', assert(master match) keep(match) nogen

save "$estimates_dir/ri_pill_assignment_reps${reps}.dta", replace
drop treatsim*
noisily dis as text "$reps random pill treatments assigned."

********************************************************************************
*************************** Matching participants ******************************
********************************************************************************

expand 6
bysort session subject: gen int round = _n
order round, after(subject)
tempvar tribe_order
recode round (1 2 4 5 = 1) (3 6 = 0), gen(`tribe_order')

noisily dis as text "Assigning $reps random pairings: " _continue
noisily dis as input "cyd: " _continue
forvalues r = 1 / $reps { // CYDA
	tempvar cyd_order cyd_left
	gen `cyd_order' = runiform() // Assign a random order to the six possible tribe-pairings
	gen byte `cyd_left'  = floor(2*runiform())
	gen int cydAmothertonguesim`r' = .
	gen int cydBmothertonguesim`r' = .
	sort session subject `cyd_order'
	by session subject: replace cydAmothertonguesim`r' = 1 if  `cyd_left' & _n == 1
	by session subject: replace cydBmothertonguesim`r' = 2 if  `cyd_left' & _n == 1
	by session subject: replace cydAmothertonguesim`r' = 1 if  `cyd_left' & _n == 2
	by session subject: replace cydBmothertonguesim`r' = 3 if  `cyd_left' & _n == 2
	by session subject: replace cydAmothertonguesim`r' = 1 if  `cyd_left' & _n == 3
	by session subject: replace cydBmothertonguesim`r' = 4 if  `cyd_left' & _n == 3
	by session subject: replace cydAmothertonguesim`r' = 2 if  `cyd_left' & _n == 4
	by session subject: replace cydBmothertonguesim`r' = 3 if  `cyd_left' & _n == 4
	by session subject: replace cydAmothertonguesim`r' = 2 if  `cyd_left' & _n == 5
	by session subject: replace cydBmothertonguesim`r' = 4 if  `cyd_left' & _n == 5
	by session subject: replace cydAmothertonguesim`r' = 3 if  `cyd_left' & _n == 6
	by session subject: replace cydBmothertonguesim`r' = 4 if  `cyd_left' & _n == 6

	by session subject: replace cydAmothertonguesim`r' = 2 if !`cyd_left' & _n == 1
	by session subject: replace cydBmothertonguesim`r' = 1 if !`cyd_left' & _n == 1
	by session subject: replace cydAmothertonguesim`r' = 3 if !`cyd_left' & _n == 2
	by session subject: replace cydBmothertonguesim`r' = 1 if !`cyd_left' & _n == 2
	by session subject: replace cydAmothertonguesim`r' = 4 if !`cyd_left' & _n == 3
	by session subject: replace cydBmothertonguesim`r' = 1 if !`cyd_left' & _n == 3
	by session subject: replace cydAmothertonguesim`r' = 3 if !`cyd_left' & _n == 4
	by session subject: replace cydBmothertonguesim`r' = 2 if !`cyd_left' & _n == 4
	by session subject: replace cydAmothertonguesim`r' = 4 if !`cyd_left' & _n == 5
	by session subject: replace cydBmothertonguesim`r' = 2 if !`cyd_left' & _n == 5
	by session subject: replace cydAmothertonguesim`r' = 4 if !`cyd_left' & _n == 6
	by session subject: replace cydBmothertonguesim`r' = 3 if !`cyd_left' & _n == 6
	
	drop `cyd_order' `cyd_left'
	if mod(`r',50) == 0 noisily dis as text _continue "`r' "
} // Repetitions
preserve
drop cydBmothertonguesim*
save "$estimates_dir/ri_match_assignment_cydA_reps${reps}.dta", replace
restore
drop cydAmothertonguesim*
save "$estimates_dir/ri_match_assignment_cydB_reps${reps}.dta", replace
drop cyd*mothertonguesim*

* DG, TG1, TG2, Exit
foreach game in "dg" "tg1" "tg2" "exit" {
	noisily dis as input "`game': " _continue
	forvalues r = 1 / $reps {
		tempvar `game'_order
		gen ``game'_order' = runiform()
		sort session subject `tribe_order' ``game'_order'
		by session subject `tribe_order': gen int `game'mothertonguesim`r' = _n
		replace `game'mothertonguesim`r' = 	      `game'mothertonguesim`r' - 2 if `tribe_order' == 0
		drop ``game'_order'
		if mod(`r',50) == 0 noisily dis as text _continue "`r' "
	} // Games
	preserve
	drop `tribe_order'
	save "$estimates_dir/ri_match_assignment_`game'_reps${reps}.dta", replace
	restore
	drop `game'mothertonguesim*
}
noisily dis

********************************************************************************
***************************** Reshape and append *******************************
********************************************************************************
* Since Stata/SE cannot handle more than 32,000 variables at once, the process
* was broken up into pieces by game which are then appended to never exceed the
* maxvar option.
********************************************************************************

** Reshape
foreach game in cydA cydB dg tg1 tg2 exit {
	use "$estimates_dir/ri_match_assignment_`game'_reps${reps}.dta", clear
	capture rename dg* *
	capture rename cydA* *
	capture rename cydB* *
	capture rename tg1* *
	capture rename tg2* * // There are five TG2 rounds for different amounts.
	capture rename exit* * // Games 10, 11 and 12 are the social proximity components.
	if "`game'" == "dg" 	gen game = 1
	if "`game'" == "cydA" 	gen game = 2
	if "`game'" == "cydB" 	gen game = 3
	if "`game'" == "tg1" 	gen game = 4
	if "`game'" == "tg2" 	gen game = 5
	if "`game'" == "exit" 	gen game = 13
	
	if "`game'" == "exit" 	drop if game == 13 & (round == 3 | round == 6) // Social proximity only has four rounds
	if "`game'" == "exit" 	recode round (4=3) (5=4) if game == 13
	sort session subject game round
	save "$estimates_dir/ri_match_assignment_`game'_reps${reps}.dta", replace
}

** Append
use "$estimates_dir/ri_pill_assignment_reps${reps}.dta", clear
merge 1:m session subject using "$estimates_dir/ri_match_assignment_dg_reps${reps}.dta", assert(3) nogen
append using "$estimates_dir/ri_match_assignment_cydA_reps${reps}.dta"
append using "$estimates_dir/ri_match_assignment_cydB_reps${reps}.dta"
append using "$estimates_dir/ri_match_assignment_tg1_reps${reps}.dta"
append using "$estimates_dir/ri_match_assignment_tg2_reps${reps}.dta"
append using "$estimates_dir/ri_match_assignment_exit_reps${reps}.dta"
order session subject game round demo_mothertongue, first

********************************************************************************
********** Replace the 0/-1 mothertongue by same and random non-same ***********
********************************************************************************

gen diff_mothertongue1 = 1*(demo_mothertongue != 1) + 2*(demo_mothertongue == 1)
gen diff_mothertongue2 = 2*(demo_mothertongue == 3 | demo_mothertongue == 4) + 3*(demo_mothertongue == 1 | demo_mothertongue == 2)
gen diff_mothertongue3 = 3*(demo_mothertongue == 4) + 4*(demo_mothertongue != 4)
gen rand_diff_mothertongue = ceil(3*runiform())
forvalues r = 1 / $reps {
	replace mothertonguesim`r' = demo_mothertongue  if mothertonguesim`r' == 0
	replace mothertonguesim`r' = diff_mothertongue1 if mothertonguesim`r' == -1 & rand_diff_mothertongue == 1
	replace mothertonguesim`r' = diff_mothertongue2 if mothertonguesim`r' == -1 & rand_diff_mothertongue == 2
	replace mothertonguesim`r' = diff_mothertongue3 if mothertonguesim`r' == -1 & rand_diff_mothertongue == 3
}
drop diff_mothertongue1 diff_mothertongue2 diff_mothertongue3 rand_diff_mothertongue

save "$estimates_dir/ri_pill_and_mothertongue_assignment_reps${reps}.dta", replace

********************************************************************************
******** Prevent simulated subjects from playing the same partner twice ********
********************************************************************************

use "$estimates_dir/ri_pill_and_mothertongue_assignment_reps${reps}.dta", clear
drop treatsim* // Retain only the condensed pill treatment assignment to keep variable count down.
set more off
noisily dis as text "Adding age group and gender; Removing duplicates: " _continue
tempvar dup
forvalues r = 1 / $reps {
	gen int agesim`r' 		= ceil(3*runiform())
	gen byte gendersim`r' 	= ceil(2*runiform())
	duplicates tag session subject game mothertonguesim`r' agesim`r' gendersim`r' if game != 13, gen(`dup')
	noisily sum `dup', meanonly
	while r(mean) > 0 {
		replace agesim`r' 		= ceil(3*runiform()) if `dup'
		replace gendersim`r' 	= ceil(2*runiform()) if `dup'
		drop `dup'
		duplicates tag session subject game mothertonguesim`r' agesim`r' gendersim`r' if game != 13, gen(`dup')
		noisily sum `dup', meanonly
	}
	drop `dup'
	if mod(`r',50) == 0 noisily dis as text _continue "`r' "
}
noisily dis

********************************************************************************
*********************** Create additional rounds for TG2 ***********************
********************************************************************************

expand 5 if game == 5, gen(aux)
replace aux = 0 if game == 5
bysort session subject game round aux: replace aux = _n
replace aux = aux - 1
replace game = game + aux
drop aux
sort session subject game round

tempfile sim_assignment
save `sim_assignment'
noisily dis as text "Simulations successfully reshaped to xlong format."

use "$cleandata_dir/HIO_for_analysis_xlong.dta", clear
drop if game_str == "likely_to_be_friends" |  game_str == "trust" |  game_str == "closeness" |  game_str == "demand"

merge m:1 session subject game round using `sim_assignment', keep(master matched) nogen
noisily dis as text "Simulations successfully merged into data set."

********************************************************************************
*********************** Generate characteristics matches ***********************
********************************************************************************

noisily dis as input "Turning characteristics into 'same'-dummy matches: "
foreach control in _luo _kikuyu _luhya _middleaged _old _female {
	gen strL condensed`control' = ""
}
forvalues r = 1 / $reps { // This process starts fast and as the variables grow longer becomes really slow. Might be faster doing each individually.
	gen _same_ethsim`r' = (mothertonguesim`r' == demo_mothertongue)
	gen _same_agesim`r' = (agesim`r' == demo_age)
	gen _same_sexsim`r'	= (gendersim`r' == demo_gender)
	replace condensed_luo 			= condensed_luo + string(mothertonguesim`r' == 1)
	replace condensed_kikuyu		= condensed_kikuyu + string(mothertonguesim`r' == 2)
	replace condensed_luhya			= condensed_luhya + string(mothertonguesim`r' == 3)
	replace condensed_middleaged 	= condensed_middleaged + string(agesim`r' == 2)
	replace condensed_old			= condensed_old + string(agesim`r' == 3)
	replace condensed_female 		= condensed_female + string(gendersim`r' == 2)
	if $reps > 1000 drop mothertonguesim`r' agesim`r' gendersim`r' // We are doubling the number of variables, so we'll want to not overburden Stata.
	if mod(`r',50) == 0 noisily dis as text _continue "`r' "
}
noisily dis
save "$estimates_dir/ri_final_reps${reps}.dta", replace

********************************************************************************
************************ Difference CYD characteristics ************************
********************************************************************************

* Re-merge CYD in wide format and form differences
noisily dis as input "Reshaping for CYD specification."
set more off
foreach var in _same_eth _same_age _same_sex {
use "$estimates_dir/ri_final_reps${reps}.dta", clear
keep if game == 3
keep idround game `var'sim*
rename _* _*B
tempfile cydB
save `cydB'
use "$estimates_dir/ri_final_reps${reps}.dta", clear
keep if game == 2
keep idround game `var'sim*
rename _* _*A 
merge 1:1 idround using `cydB', nogen
erase `cydB'
forvalues r = 1 / $reps {
	gen `var'sim`r' = `var'sim`r'A - `var'sim`r'B
	drop `var'sim`r'A `var'sim`r'B
}
tempfile cyd`var'
save `cyd`var''
}

use "$estimates_dir/ri_final_reps${reps}.dta", clear
keep if game == 3
keep idround game condensed*
rename condensed* condensed*B
tempfile cydB
save `cydB'
use "$estimates_dir/ri_final_reps${reps}.dta", clear
keep if game == 2
keep idround game condensed*
rename condensed* condensed*A
merge 1:1 idround using `cydB', nogen
erase `cydB'
foreach control in _luo _kikuyu _luhya _middleaged _old _female {
	gen strL condensed`control' = ""
}
forvalues r = 1 / $reps {
	foreach var in _luo _kikuyu _luhya _middleaged _old _female {
		gen A = substr(condensed`var'A,`r',1)
		gen B = substr(condensed`var'B,`r',1)
		destring A B, replace
		replace condensed`var' = condensed`var' + string(A-B+1) // This condensed variable cannot handle the two digits "-1", so I later subtract 1 again.
		drop A B
	}
}
foreach var in _treatsim _luo _kikuyu _luhya _middleaged _old _female {
	drop condensed`var'A condensed`var'B
}
tempfile cyd_condensed
save `cyd_condensed'
use "$estimates_dir/ri_final_reps${reps}.dta", clear
merge 1:1 idround game using `cyd_same_eth',  nogen update replace
merge 1:1 idround game using `cyd_same_age',  nogen update replace
merge 1:1 idround game using `cyd_same_sex',  nogen update replace
merge 1:1 idround game using `cyd_condensed', nogen update replace
drop if game == 3

********************************************************************************
*************************** Save simulated data set ****************************
********************************************************************************

save "$estimates_dir/ri_final_reps${reps}.dta", replace
noisily dis as input "Successfully set up data set."

}
********************************************************************************
********************** Obtain sampling-inference p-values **********************
********************************************************************************

use "$estimates_dir/ri_final_reps${reps}.dta", clear // ~ 16GB for R = 10,000
foreach matrix in diff_eth pill eth_Delta {
	matrix define `matrix' = . , . \ . , . \ . , . \ . , . \ . , .
	matrix colnames `matrix' = Sampling_p Randomization_Inference_p
	matrix rownames `matrix' = $main_table_vars
}

foreach game in "dg" "tg1" "tg2" "social_proximity" {
	estimates use "$estimates_dir/`game'_spec1"
		
	* Sample-specific weighting
	$noisyregs sum treatment if game_str == "`game'"
	local share_hydro = r(mean)
	$noisyregs sum _same_eth if game_str == "`game'"
	local share_same_eth = r(mean)
	$noisyregs sum _same_sex if game_str == "`game'"
	local share_same_sex = r(mean)
	$noisyregs sum _same_age if game_str == "`game'"
	local share_same_age = r(mean)

	** Pooled estimators (at the mean)
	lincom _b[1._same_eth] + `share_hydro'*_b[1._same_eth#1.treatment]
	matrix diff_eth[rownumb(diff_eth,"`game'"),1] = 2*ttail(r(df),abs(r(estimate)/r(se)))

	lincom _b[1.treatment] + `share_same_eth'*_b[1._same_eth#1.treatment] + ///
							 `share_same_sex'*_b[1._same_sex#1.treatment] + ///
							 `share_same_age'*_b[1._same_age#1.treatment]
	matrix pill[rownumb(pill,"`game'"),1] = 2*ttail(r(df),abs(r(estimate)/r(se)))
	** Interactions
	lincom _b[1._same_eth#1.treatment]
	matrix eth_Delta[rownumb(eth_Delta,"`game'"),1] = 2*ttail(r(df),abs(r(estimate)/r(se)))
}
estimates use "$estimates_dir/cyd_spec1"
local game "cyd"
** Pooled estimators
lincom _b[_same_eth] + `share_hydro'*_b[_same_eth#1.treatment]
matrix diff_eth[rownumb(diff_eth,"cyd"),1] = 2*ttail(r(df),abs(r(estimate)/r(se)))
** Interactions
lincom _b[_same_eth#1.treatment]
matrix eth_Delta[rownumb(eth_Delta,"cyd"),1] = 2*ttail(r(df),abs(r(estimate)/r(se)))

********************************************************************************
************** Main specification 1 under randomization inference **************
********************************************************************************

rename treatment realized_treatment
foreach var in _same_eth _same_age _same_sex _luo _kikuyu _luhya _middleaged _old _female {
	rename `var' realized`var'
	rename cyd_diff`var' realized_cyd_diff`var'
}

replace round = round + 0.1 if game == 5
replace round = round + 0.2 if game == 6
replace round = round + 0.3 if game == 7
replace round = round + 0.4 if game == 8
replace round = round + 0.5 if game == 9

gen str1 aux = ""
forvalues spec = 1 / 1 { // Specifications 1 (between) and 2 (within)
	foreach game in $main_table_vars { // Games $main_table_vars
		noisily dis as result "Running regression specification `spec' `game': "
		local fe = ""
		local if_restriction ""
		local cluster cluster(ID)
		
		if "`game'" != "social_proximity" & "`game'" != "cyd" local y "share"
		else local y "decision"
		
		preserve
		
		if "`game'" == "allocations" {
			keep if game_str == "dg" | game_str == "tg1" | game_str == "tg2"
			if `spec' == 2 {
				replace round =   -round if game_str == "tg1"
				replace round = 10*round if game_str == "tg2" // Make rounds integer
			}
		}
		else keep if game_str == "`game'"
		if "`game'" == "tg2" replace round = 10*round // Make rounds integer
		
		if `spec' == 2 & "`game'" != "cyd" xtset ID round
		
		foreach matrix in diff_eth pill eth_Delta {
			local count`matrix' = 0
		}
		if "`game'" == "cyd" local countpill = .
		if "`game'" == "cyd" local counthydro_diff = .
		if "`game'" == "cyd" local counthydro_same = .
		forvalues r = 1 / $reps { // Simulations
		
			replace aux = substr(condensed_treatsim,`r',1)
			destring aux, gen(treatment)
			
			foreach var in _luo _kikuyu _luhya _middleaged _old _female {
				replace aux = substr(condensed`var',`r',1)
				destring aux, gen(`var')
				gen int cyd_diff`var' = `var' - 1 if game_str == "cyd"
			}
		
			if "`game'" != "cyd" {
				rename _same_ethsim`r' _same_eth
				rename _same_agesim`r' _same_age
				rename _same_sexsim`r' _same_sex
			}
			if "`game'" == "cyd" {
				rename _same_ethsim`r' cyd_diff_same_eth
				rename _same_agesim`r' cyd_diff_same_age
				rename _same_sexsim`r' cyd_diff_same_sex
			}
			local x "eth"
			include "$do_analysis_dir/main`spec'.do"
			if "`game'" != "cyd" {
				rename _same_eth _same_ethsim`r'
				rename _same_age _same_agesim`r'
				rename _same_sex _same_sexsim`r'
			}
			if "`game'" == "cyd" {
				rename cyd_diff_same_eth _same_ethsim`r'
				rename cyd_diff_same_age _same_agesim`r'
				rename cyd_diff_same_sex _same_sexsim`r'
			}
			if "`game'" != "cyd" {
				lincom _b[1._same_eth] + `share_hydro'*_b[1._same_eth#1.treatment]
				if diff_eth[rownumb(diff_eth,"`game'"),1] > 2*ttail(r(df),abs(r(estimate)/r(se))) local ++countdiff_eth
				
				lincom _b[1.treatment] + `share_same_eth'*_b[1._same_eth#1.treatment] + ///
										 `share_same_sex'*_b[1._same_sex#1.treatment] + ///
										 `share_same_age'*_b[1._same_age#1.treatment]
				if pill[rownumb(pill,"`game'"),1] > 2*ttail(r(df),abs(r(estimate)/r(se))) local ++countpill
				
				** Interactions
				lincom _b[1._same_eth#1.treatment]
				if eth_Delta[rownumb(eth_Delta,"`game'"),1] > 2*ttail(r(df),abs(r(estimate)/r(se))) local ++counteth_Delta
			}
			else { // CYD
				** Pooled estimators
				lincom _b[_same_eth] + `share_hydro'*_b[_same_eth#1.treatment]
				if diff_eth[rownumb(diff_eth,"cyd"),1] > 2*ttail(r(df),abs(r(estimate)/r(se))) local ++countdiff_eth
				** Interactions
				lincom _b[_same_eth#1.treatment]
				if eth_Delta[rownumb(eth_Delta,"cyd"),1] > 2*ttail(r(df),abs(r(estimate)/r(se))) local ++counteth_Delta
			}
			drop treatment _luo _kikuyu _luhya _middleaged _old _female cyd_diff_luo cyd_diff_kikuyu cyd_diff_luhya cyd_diff_middleaged cyd_diff_old cyd_diff_female // so it can be re-generated by destring
			if mod(`r',50) == 0 noisily dis as text _continue "`r' "
			if substr("$S_TIME",1,5) == "$pausetime" pause Do you want to run this program for another 24h? Press q when you are ready to resume the program.
		} // Repetitions
		foreach matrix in diff_eth pill eth_Delta { // Calculate RI exact p-value
			matrix `matrix'[rownumb(`matrix',"`game'"),2] = `count`matrix''/$reps
		}
		restore
		noisily dis
	} // Games

	noisily mat list diff_eth 
	noisily mat list pill
	noisily mat list eth_Delta
} // Specifications

drop *
set obs 1

local spec = 1

foreach matrix in diff_eth pill eth_Delta {
	capture drop *
	svmat `matrix', names(col) 
	save "$estimates_dir/ri_`matrix'_spec`spec'_reps${reps}.dta", replace 
}

noisily dis as input "End: Randomization Inference"
